
import numpy as np
import os 
import datetime
from textblob import TextBlob
from wordcloud import WordCloud 
import pandas as pd  
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns

df = pd.read_csv('./Alabama_data_0302.csv')

# sentiment analysis
# def getTextSubjectivity(txt):
#     return TextBlob(txt).sentiment.subjectivity

# def getTextPolarity(txt):
#     return TextBlob(txt).sentiment.polarity

# df['Subjectivity'] = df['description'].apply(getTextSubjectivity)  
# df['Polarity'] = df['description'].apply(getTextPolarity)  


#Define polarity and subjectivity
def getTextAnalysis(a):
    if a>=0.1:
        return "Positive"
    elif a <= -0.1:
        return "Negative"
    else:
        return "Neutral"
df['Score'] = df['Polarity'].apply(getTextAnalysis)
def getTextAnalysis(a):
    if a>=0.4:
        return "Subjective"
    elif a <= 0.2:
        return "Objective"
    else:
        return "Normal"
df['Score2'] = df['Subjectivity'].apply(getTextAnalysis)
df['datetime'] = pd.to_datetime(df['datetime'])
#select environmental accidents
#accident 2019 in Cullman  and Walker county
Cullman = df[df['County'].str.contains('CULLMAN')]
Cullman = Cullman[Cullman.datetime.dt.year == 2019]

Cullman = Cullman[9:97]

WALKER = df[df['County'].str.contains('WALKER')]
WALKER = WALKER[WALKER.datetime.dt.year == 2019]

WALKER = WALKER[8:78]        

df2 = pd.concat([df, Cullman, WALKER]).drop_duplicates(keep=False)   #df2删除5月到8月四个月的数据
# accident 2021 in Perry county
Perry = df[df['County'].str.contains('PERRY')]
Perry = Perry[Perry.datetime.dt.year == 2021]
Perry = Perry[1:]


df2 = pd.concat([df2, Perry]).drop_duplicates(keep=False)  

#---------------

#Selection of extreme periods
df2 = df2.set_index(['datetime'])
FloodA2 = df2.loc[(df2.index > pd.to_datetime("2015-9-30")) & (df2.index < pd.to_datetime("2016-5-1"))]
FloodA3 = df2.loc[(df2.index > pd.to_datetime("2018-7-31")) & (df2.index < pd.to_datetime("2019-4-1"))]

DroughtB2 = df2.loc[(df2.index > pd.to_datetime("2016-4-30")) & (df2.index < pd.to_datetime("2017-5-1"))]
C =  df2.loc[(df2.index > pd.to_datetime("2020-1-1")) & (df2.index < pd.to_datetime("2021-1-1"))]
C2021 =  df2.loc[(df2.index > pd.to_datetime("2021-1-1")) & (df2.index < pd.to_datetime("2022-1-1"))]
#Print numbers of each emotions
Positive = df2[df2['Score'].str.contains("Positive")]
Negative = df2[df2['Score'].str.contains("Negative")]
Neutral = df2[df2['Score'].str.contains("Neutral")]


Objective = df2[df2['Score2'].str.contains("Objective")]
Subjective = df2[df2['Score2'].str.contains("Subjective")]
Normal = df2[df2['Score2'].str.contains("Normal")]

print(Positive.shape)
print(Negative.shape)
print(Neutral.shape)

print(Objective.shape)
print(Subjective.shape)
print(Normal.shape)
print(df2.shape)
# PDF figure 

data13 = df2.Subjectivity # 生成100个随机数，这里生成正态分布，任意分布都行，正态分布效果更明显一些
res_freq13 = stats.relfreq(data13, numbins=40, defaultreallimits= (0,1)) # numbins 是统计一次的间隔(步长)是多大

pdf_value13 = res_freq13.frequency

cdf_value13 = np.cumsum(res_freq13.frequency)

x13 = res_freq13.lowerlimit + np.linspace(0, res_freq13.binsize * res_freq13.frequency.size, res_freq13.frequency.size)
#-----------------------------------------------------------------------
data14 = df2.Polarity # 生成100个随机数，这里生成正态分布，任意分布都行，正态分布效果更明显一些
res_freq14 = stats.relfreq(data14, numbins=40, defaultreallimits= (-1,1)) # numbins 是统计一次的间隔(步长)是多大

pdf_value14 = res_freq14.frequency

cdf_value14 = np.cumsum(res_freq14.frequency)

x14 = res_freq14.lowerlimit + np.linspace(0, res_freq14.binsize * res_freq14.frequency.size, res_freq14.frequency.size)
#-----------------------------------------------------------------------
plt.figure(figsize=(25, 10))
plt.figure(1)


ax6 = plt.subplot(121)
ax6.axvspan(0.08, 1.05, color='lightblue', alpha=0.3, lw=0)
ax6.axvspan(-0.13, -1, color='#FFC1C1', alpha=0.2, lw=0)

ax6.bar(x14, pdf_value14, width=res_freq14.binsize,color="#FF8000",alpha=0.7,edgecolor="black", label='Polarity',align='edge')
#ax6.bar(x6, pdf_value6, width=res_freq6.binsize,color="#308014",alpha=0.5,edgecolor="black",label='C',align='edge')
ax6.set_xticks(np.arange(-1, 1.1, 0.2)) 
#ax6.set_yticks(np.arange(0, 0.4, 0.05))
ax6.tick_params(labelsize=25)
#ax6.set_title('Polairty', fontsize = 25)
ax6.legend(fontsize = 25)
ax6.axvline(x=-0.13, ymin=0, ymax=3, color='r', linewidth = 1, ls='--',label='test lines')
ax6.axvline(x=-1, ymin=0, ymax=3, color='r', linewidth = 1, ls='--',label='test lines')
ax6.axvline(x=0.08, ymin=0, ymax=3, color='b', linewidth = 1, ls='--',label='test lines')
ax6.axvline(x=1.05, ymin=0, ymax=3, color='b', linewidth = 1, ls='--',label='test lines')

plt.ylabel('Probability density function', fontsize = 28)
ax12 = plt.subplot(122)
ax12.axvspan(0, 0.204, color='lightblue', alpha=0.3, lw=0)
ax12.axvspan(0.408, 1.025, color='#FFC1C1', alpha=0.2, lw=0)
ax12.bar(x13, pdf_value13, width=res_freq13.binsize,color="#1874CD",alpha=0.7,edgecolor="black",label='Subjectivity',align='edge')
ax12.set_xticks(np.arange(0, 1, 0.1))  
ax12.tick_params(labelsize=25)
ax12.legend(fontsize=25)
ax12.axvline(x=0.204, ymin=0, ymax=3, color='b', ls='--',linewidth = 1, label='test lines')
ax12.axvline(x=0, ymin=0, ymax=3, color='b', ls='--',linewidth = 1, label='test lines')
ax12.axvline(x=0.408, ymin=0, ymax=3, color='r', ls='--',linewidth = 1, label='test lines')
ax12.axvline(x=1.025, ymin=0, ymax=3, color='r', ls='--',linewidth = 1, label='test lines')
plt.text(0.55,0.1,'(S): 32.25%', fontsize=25)
plt.text(-0.05,0.1,'(O): 26.29%', fontsize=25)
plt.text(-0.75,0.1,'(+): 11.97%',  fontsize=25)
plt.text(-1.25,0.1,'(—): 34.02%', fontsize=25)
#-----------------------------------------------------------------
#TOP 15 and other 52 counties 
top_15 = pd.read_csv('./top15.csv')
top_15['datetime'] = pd.to_datetime(top_15['datetime'])
other_52 = pd.read_csv('./other_52.csv')
other_52['datetime'] = pd.to_datetime(other_52['datetime'])

top_15 = top_15.set_index(['datetime'])


FloodA2 = top_15.loc[(top_15.index > pd.to_datetime("2015-9-30")) & (top_15.index < pd.to_datetime("2016-5-1"))]
FloodA3 = top_15.loc[(top_15.index > pd.to_datetime("2018-7-31")) & (top_15.index < pd.to_datetime("2019-4-1"))]

DroughtB2 = top_15.loc[(top_15.index > pd.to_datetime("2016-4-30")) & (top_15.index < pd.to_datetime("2017-5-1"))]
C =  top_15.loc[(top_15.index > pd.to_datetime("2020-1-1")) & (top_15.index < pd.to_datetime("2021-1-1"))]
C2021 =  top_15.loc[(top_15.index > pd.to_datetime("2021-1-1")) & (top_15.index < pd.to_datetime("2022-1-1"))]
# Bootstrapping for polarity
def bootstrap_polarity(func):
    polar = func.Polarity
    polar_total = top_15.Polarity 

    pos_co = []
    neg_co = []


    for i in np.arange(0,10000):
        wk = np.random.randint(0, np.size(polar), np.size(polar))
        wt = np.random.randint(0, np.size(polar_total), np.size(polar))
        prate = 0
        nrate = 0
        prate_total = 0
        nrate_total = 0
        for j   in wk:           # use set to remove duplicate values (same values)
                if polar[j] >= 0.1:
                   prate = prate + 1
                if polar[j] <= -0.1:
                   nrate = nrate + 1           
        for k in wt:
                if  polar_total[k] >= 0.1:
                    prate_total = prate_total+1
                if  polar_total[k] <= -0.1:
                    nrate_total = nrate_total+1
        pos_co.append((prate/polar.size)/(prate_total/polar.size))
        neg_co.append((nrate/polar.size)/(nrate_total/polar.size))

        newp = np.sort(pos_co)
        newn = np.sort(neg_co)

    return newp[250].round(4), newp[5000].round(4), newp[9750].round(4), newn[250].round(4), newn[5000].round(4), newn[9750].round(4)



pos_low2,pos_med2,pos_hig2,neg_low2,neg_med2,neg_hig2 = bootstrap_polarity(FloodA2)
pos_low3,pos_med3,pos_hig3,neg_low3,neg_med3,neg_hig3 = bootstrap_polarity(DroughtB2)
pos_low4,pos_med4,pos_hig4,neg_low4,neg_med4,neg_hig4 = bootstrap_polarity(FloodA3)
print(pos_low2,pos_med2,pos_hig2,neg_low2,neg_med2,neg_hig2)
print(pos_low3,pos_med3,pos_hig3,neg_low3,neg_med3,neg_hig3)
print(pos_low4,pos_med4,pos_hig4,neg_low4,neg_med4,neg_hig4)

#Bootstrapping for subjectivity
def bootstrap_subjectivity(func):
    polar = func.Subjectivity
    polar_total = top_15.Subjectivity 

    sub_co = []
    obj_co = []


    for i in np.arange(0,10000):
        wk = np.random.randint(0, np.size(polar), np.size(polar))
        wt = np.random.randint(0, np.size(polar_total), np.size(polar))
        srate = 0
        orate = 0
        srate_total = 0
        orate_total = 0
        for j   in wk:           # use set to remove duplicate values (same values)
                if polar[j] >= 0.4:
                   srate = srate + 1
                if polar[j] <= 0.2:
                   orate = orate + 1           
        for k in wt:
                if  polar_total[k] >= 0.4:
                    srate_total = srate_total+1
                if  polar_total[k] <= 0.2:
                    orate_total = orate_total+1
        sub_co.append((srate/polar.size)/(srate_total/polar.size))
        obj_co.append((orate/polar.size)/(orate_total/polar.size))

        news = np.sort(sub_co)
        newo = np.sort(obj_co)

    #return newp[250].round(4), newp[5000].round(4), newp[9750].round(4), newn[250].round(4), newn[5000].round(4), newn[9750].round(4)
    return newo[500].round(4), newo[5000].round(4), newo[9500].round(4), news[500].round(4), news[5000].round(4), news[9500].round(4)
    


pos_low2,pos_med2,pos_hig2,neg_low2,neg_med2,neg_hig2 = bootstrap_subjectivity(FloodA2)
pos_low3,pos_med3,pos_hig3,neg_low3,neg_med3,neg_hig3 = bootstrap_subjectivity(DroughtB2)
pos_low4,pos_med4,pos_hig4,neg_low4,neg_med4,neg_hig4 = bootstrap_subjectivity(FloodA3)

# The bootstrapping for other 52 counties is the same with this one.

# Plot figures 
# 更正后： total从所有评论中选取


label =('Total','F-2016','F-2016', 'D-2017','D-2017','F-2019')
positive = [0.7222,0.6393,0.8333]
dyy = [[0.2677,0.2107,0.257],[0.4028,0.2925,0.3667]]


total_positive = [0.9,0.5714,0.6786]
dy = [[0.4,0.2259,0.3082],[0.7,0.3397,0.5089]]

negative = [1.1818,1.4079,1.2761]
dzz = [[0.2192,0.2079,0.1992],[0.2756,0.2588,0.2388]]

total_negative = [1.2195,1.359,1.2644]
dz = [[0.2518,0.1967,0.2376],[0.3215,0.2473,0.3038]]


x = np.arange(0,3,1)
plt.figure(figsize=(16,14))

fig, ax = plt.subplots(figsize=(22,7))
    
    
ax.errorbar(x-0.3, 
             positive, 
             yerr=dyy,
             fmt='^', ms = 12,color = 'b', ecolor='b',elinewidth=3,label= 'Positive(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x-0.15, 
             total_positive, 
             yerr=dy,
             fmt='.', ms = 15,color = 'b', ecolor='b',elinewidth=3,label= 'Positive(52 counties)',
            )
ax.errorbar(x+0.15, 
             negative, 
             yerr=dzz,
             fmt='v', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Negtive(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.3, 
             total_negative, 
             yerr=dz,
             fmt='x', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Negtive(52 counties)',
            capsize=1,barsabove=2)

plt.ylabel('Percentage Ratio [-]', fontsize =25)
#plt.xlabel('Polarity', fontsize = 25)
plt.xticks(fontsize = 0.01)
plt.yticks((0.4,0.8,1.2,1.6,2.0), fontsize=25)

plt.grid(axis='y',ls=':',lw=3)
plt.legend( mode='expand',loc='upper center', labelspacing=0,fontsize=18,markerscale=1, ncol=4,borderaxespad=0.6,
         handletextpad=0.2,borderpad=0.4, framealpha=0.1,columnspacing=-1)

plt.axvline(x=0.5, color='black', linestyle='--')
plt.axvline(x=1.5, color='black', linestyle='--')
plt.text(-0.1,0.13,'F-2016', fontsize=25)
plt.text(0.9,0.13,'D-2017', fontsize=25)
plt.text(1.9,0.13,'F-2019', fontsize=25)
plt.axhline(y=1, color='black', linestyle='-')

#plt.savefig('../Figure3a_polarity.eps')

# Plot figure for subjectivity
label =('Total','F-2016','F-2016', 'D-2017','D-2017','F-2019')
objective = [1.186,0.876,0.9339]
dyy =[[0.2085,0.1487,0.1578],[0.2664,0.1757,0.1934]]


total_objective = [0.746,0.9905,0.75]
dy = [[0.196,0.178,0.1821],[0.254,0.2154,0.2328]]


subjective = [0.8585,0.8099,0.9444]
dzz = [[0.1526,0.1297,0.1394],[0.1864,0.1463,0.1675]]

total_subjective = [0.8767,0.8516,0.8272]
dz = [[0.1892,0.1441,0.175],[0.244,0.1658,0.2133]]


plt.figure(figsize=(16,14))
x = np.arange(0,3,1)
fig, ax = plt.subplots(figsize=(22,7))
    
    
objective = ax.errorbar(x-0.3, 
             objective, 
             yerr=dyy,
             fmt='^', ms = 12,color = 'b', ecolor='b',elinewidth=3,label= 'Objective(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x-0.15, 
             total_objective, 
             yerr=dy,
             fmt='.', ms = 14,color = 'b', ecolor='b',elinewidth=3,label= 'Objective(52 counties)',
            )
subjective = ax.errorbar(x+0.15, 
             subjective, 
             yerr=dzz,
             fmt='v', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Subjective(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.3, 
             total_subjective, 
             yerr=dz,
             fmt='x', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Subjective(52 counties)',
            capsize=1,barsabove=2)

plt.ylabel('Percentage Ratio [-]', fontsize =25)
#plt.xlabel('Subjectivity', fontsize = 25)
#plt.set_xticklabels(label,  fontsize = 30)
#ax.tick_params(labelsize=25)
plt.grid(axis='y',ls=':',lw=3)
plt.legend( mode='expand',loc='upper center', labelspacing=0,fontsize=18,markerscale=1, ncol=4,borderaxespad=0.6,
         handletextpad=0.2,borderpad=0.4, framealpha=0.1,columnspacing=-1)
#ax.legend(loc='upper right',  bbox_to_anchor=(0.15, 1),fontsize=15,markerscale=0.9)
plt.yticks((0.4,0.6,0.8,1.0,1.2,1.4,1.6), fontsize = 25)
plt.xticks(fontsize=0.01)
plt.axvline(x=0.5, color='black', linestyle='--')
plt.axvline(x=1.5, color='black', linestyle='--')
#plt.axvline(x=2.5, color='black', linestyle='--')
plt.text(-0.1,0.31,'F-2016', fontsize=25)
plt.text(0.9,0.31,'D-2017', fontsize=25)
plt.text(1.9,0.31,'F-2019', fontsize=25)
plt.axhline(y=1,lw=2., color='black', linestyle='-')

# Selction of Covid-2020 and covid-2021 extreme evetns
top_15 = pd.read_csv('./top15.csv')
other_15 = pd.read_cs('./other_52.csv')


C_2020_top_15 = top_15[top_15.datetime.dt.year == 2020]
C_2021_top_15 = top_15[top_15.datetime.dt.year == 2021]

#----------------------
C_2020_other_52 = other_52[other_52.datetime.dt.year == 2020]
C_2021_other_52 = other_52[other_52.datetime.dt.year == 2021]
# reset index before run bootstrapping code otherwise error comes
C_2020_top_15 = C_2020_top_15.set_index('datetime')
C_2021_top_15 = C_2021_top_15.set_index('datetime')
#----------------------
C_2020_other_52 = C_2020_other_52.set_index('datetime')
C_2021_other_52 = C_2021_other_52.set_index('datetime')

# run bootstrapping code and get the CI
# plot figure for polarity
label =('Total','C-2020','1','C-2021')
objective = [1.3146
,1.3663]
dyy = [[0.295
,0.2703],[0.4054
,0.348]]

total_objective = [1.2885
,1.5517]
dy = [[0.3666
,0.4128],[0.5158
,0.6116]]


subjective = [0.7018
,0.7345]
dzz = [[0.1131
,0.1078],[0.1315
,0.1193]]

total_subjective = [0.7273
,0.7062]
dz = [[0.1253
,0.1215],[0.1454
,0.1435]]


x = np.arange(0,2,1)
fig, ax = plt.subplots(figsize=(10,10))
       
objective = ax.errorbar(x+0.2, 
             objective, 
             yerr=dyy,
             fmt='^', ms = 12,color = 'b', ecolor='b',elinewidth=3,label= 'Positive(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.35, 
             total_objective, 
             yerr=dy,
             fmt='.', ms = 16,color = 'b', ecolor='b',elinewidth=3,label= 'Positive(52 counties)',
            )
subjective = ax.errorbar(x+0.8, 
             subjective, 
             yerr=dzz,
             fmt='v', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Negative(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.95, 
             total_subjective, 
             yerr=dz,
             fmt='x', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Negative(52 counties)',
            capsize=1,barsabove=2)

ax.set_ylabel('Percentage Ratio [-]', fontsize =25)
#ax.set_xticklabels(label,fontsize = 30)
#ax.tick_params(labelsize=25)
plt.yticks(fontsize=25)
plt.xticks(fontsize=0.01)
ax.grid(axis='y',ls=':',lw=3)
ax.legend( mode='expand',loc='lower center', labelspacing=1,fontsize=18,markerscale=1, ncol=2,borderaxespad=0.6,
         handletextpad=0.2,borderpad=0.4, framealpha=0.1,columnspacing=1)
ax.set_yticks((0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0,2.2))
plt.text(0.45,0.1,'C-2020',fontsize=25)
plt.text(1.45,0.1,'C-2021',fontsize=25)
plt.axvline(x=1.1, color='grey', linestyle='-')
plt.axhline(y=1,lw=2., color='black', linestyle='-')
#plt.savefig('../Figure4a_polarity.eps')

# plot figure for subjectivity
label =('1','C-2020','2','C-2021')
objective = [0.9148,1.0424]
dyy = [[0.1283,0.1254],[0.1481,0.1448]]

total_objective = [0.9621,0.9014]
dy = [[0.1549,0.1496],[0.186,0.173]]


subjective = [1.2332,1.2388]
dzz = [[0.1384,0.1223],[0.1544,0.1376]]

total_subjective = [1.1534,1.3007]
dz = [[0.1534,0.1618],[0.1754,0.1993]]


x = np.arange(0,2,1)
fig, ax = plt.subplots(figsize=(10,10))
    
    
objective = ax.errorbar(x+0.2, 
             objective, 
             yerr=dyy,
             fmt='^', ms = 12,color = 'b', ecolor='b',elinewidth=3,label= 'Objective(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.35, 
             total_objective, 
             yerr=dy,
             fmt='.', ms = 16,color = 'b', ecolor='b',elinewidth=3,label= 'Objective(52 counties)',
            )
subjective = ax.errorbar(x+0.8, 
             subjective, 
             yerr=dzz,
             fmt='v', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Subjective(15 counties)',
            capsize=1,barsabove=2)
ax.errorbar(x+0.95, 
             total_subjective, 
             yerr=dz,
             fmt='x', ms = 12,color = 'r', ecolor='r',elinewidth=3,label= 'Subjective(52 counties)',
            capsize=1,barsabove=2)

ax.set_ylabel('Percentage Ratio [-]', fontsize =25)
#ax.set_xticklabels(label,fontsize = 1)
#ax.tick_params(labelsize=25)
ax.grid(axis='y',ls=':',lw=3)
ax.legend( mode='expand',loc='lower center', labelspacing=1,fontsize=18,markerscale=1, ncol=2,borderaxespad=0.6,
         handletextpad=0.2,borderpad=0.4, framealpha=0.1,columnspacing=1)
#ax.legend(loc='upper right',  bbox_to_anchor=(0.15, 1),fontsize=15,markerscale=0.9)
ax.set_yticks((0.6,0.8,1.0,1.2,1.4,1.6))
plt.text(0.45,0.55,'C-2020',fontsize=25)
plt.yticks(fontsize=25)
plt.xticks(fontsize=0.002)
plt.text(1.45,0.55,'C-2021',fontsize=25)
#plt.axvline(x=0.7, color='grey', linestyle='-')
plt.axvline(x=1.1, color='grey', linestyle='-')
plt.axhline(y=1,lw=2., color='black', linestyle='-')
#plt.savefig('../Figure4b_subjectivity.eps')














